
# set working directory path to replication folder IFoT_rep
# curcomp <- "C:/Users/iosgood/Dropbox/Divisions over supply chain/IFoT_rep" 
# setwd(curcomp)

######################
## Load in the data ##
######################

# load in libraries
library(xtable)
library(Zelig)
library(formula.tools)
library(pscl)

# load data for main analysis
usdat <- read.csv("data/IFoT_fta_data.csv", header = T)

## clean the data and add in new measures

# is there a supporting or lobbying association
usdat$supassoc <- usdat$assposadcvd %in% c("Favor","Divided")
usdat$lobassoc <- usdat$numassoclob > 0

# industry divided outcome variable
usdat$divided3 <- (usdat$assposadcvd %in% c("Divided")) | (usdat$numsupfirm >= 3 & (usdat$numoppfirm >=3 | usdat$assposadcvd %in% c("Divided","Oppose"))) |  (usdat$numoppfirm >= 3 & (usdat$numsupfirm >=3 | usdat$assposadcvd %in% c("Divided","Favor")))
mean(usdat$divided3)
mean(usdat$divided3[(usdat$assposadcvd %in% c("Favor","Divided","Oppose")) | (usdat$numsupfirm >= 3) | (usdat$numoppfirm >=3)])
mean(usdat$divided3[(usdat$sector == "manu") & (usdat$assposadcvd %in% c("Favor","Divided","Oppose")) | (usdat$numsupfirm >= 3) | (usdat$numoppfirm >=3)])

usdat$divided1 <- (usdat$assposadcvd %in% c("Divided")) | (usdat$numsupfirm >= 1 & (usdat$numoppfirm >=1 | usdat$assposadcvd %in% c("Divided","Oppose"))) |  (usdat$numoppfirm >= 1 & (usdat$numsupfirm >=1 | usdat$assposadcvd %in% c("Divided","Favor")))
mean(usdat$divided1)
mean(usdat$divided1[(usdat$assposadcvd %in% c("Favor","Divided","Oppose")) | (usdat$numsupfirm >= 1) | (usdat$numoppfirm >=1)])

# fragmentation indices
usdat$posdivindex <- (usdat$numsupfirm + usdat$numoppfirm)/((usdat$assposadcvd %in% c("Divided","Oppose","Favor")) + usdat$numsupfirm + usdat$numoppfirm)
usdat$lobdivindex <- (usdat$numfirmlob)/(usdat$numassoclob  + usdat$numfirmlob)
usdat$lobspnddivindex <- (usdat$spndfirmlob)/(usdat$spndassoclob + usdat$spndfirmlob)

# see beginning of Data section
cor(usdat[is.na(usdat$posdivindex) == FALSE & is.na(usdat$lobdivindex) == FALSE,c("posdivindex","lobdivindex")])
mean(c(usdat$numassoclob >= 1)[usdat$numfirmlob >= 1])

# disaggregated lobbying splits
usdat$firmsnoassoc3 <- (usdat$numsupfirm >= 3 | usdat$numoppfirm >= 3) & (usdat$assposadcvd == "No position")
usdat$assocnofirms3 <- (usdat$numsupfirm < 3 & usdat$numoppfirm < 3) & ((usdat$assposadcvd == "No position") == FALSE)
usdat$assocandfirms3 <-  (usdat$numsupfirm >= 3 | usdat$numoppfirm >= 3) & ((usdat$assposadcvd == "No position") == FALSE)

usdat$firmslobnotassoc3 <- (usdat$numfirmlob >= 3) & (usdat$numassoclob == 0)
usdat$assoclobnotfirms3 <- (usdat$numfirmlob < 3) & (usdat$numassoclob >= 1)
usdat$firmsandassoclob3 <-  (usdat$numfirmlob >= 3) & (usdat$numassoclob >= 1)

# dummy for primary sector
usdat$primary <- as.numeric(usdat$sector %in% c("manu") == FALSE)

# ensure factor variables are correct
usdat$diff <- factor(usdat$diff, c("Homogeneous","Mod. differentiated", "Differentiated"))

# ca variable for 05-09
usdat$nonrpca0509 <- 3; 
usdat$nonrprelexp0509 <- usdat$exp0509 - usdat$nonrpimp0509
usdat$nonrpca0509[usdat$nonrprelexp0509 < quantile(usdat$nonrprelexp0509, .4)] <- 2; 
usdat$nonrpca0509[usdat$nonrprelexp0509 > quantile(usdat$nonrprelexp0509, .6)] <- 4;
usdat$nonrpca0509[usdat$nonrprelexp0509 < quantile(usdat$nonrprelexp0509, .2)] <- 1; 
usdat$nonrpca0509[usdat$nonrprelexp0509 > quantile(usdat$nonrprelexp0509, .8)] <- 5;

# ca variable for 10-14
usdat$nonrpca1014 <- 3; 
usdat$nonrprelexp1014 <- usdat$exp1014 - usdat$nonrpimp1014
usdat$nonrpca1014[usdat$nonrprelexp1014 < quantile(usdat$nonrprelexp1014, .4)] <- 2; 
usdat$nonrpca1014[usdat$nonrprelexp1014 > quantile(usdat$nonrprelexp1014, .6)] <- 4;
usdat$nonrpca1014[usdat$nonrprelexp1014 < quantile(usdat$nonrprelexp1014, .2)] <- 1; 
usdat$nonrpca1014[usdat$nonrprelexp1014 > quantile(usdat$nonrprelexp1014, .8)] <- 5;

# clean up tariff measures
usdat$agrtaravg[is.na(usdat$agrtaravg)] <- 0
usdat$agrcoefvar <- sqrt(usdat$agrtarvar)/(usdat$agrtaravg+.001)

# create noise variable used in simulation functions
usdat$korrelus <- rnorm(nrow(usdat))

# diff inputs
usdat$totusinpd0509 <- usdat$totusinpr0509 + usdat$totusinpn0509

## load in various functions and libraries
source("code/IFoT_repcode_Rfunctions.R")

## set wd for rest of analysis (mostly)
setwd(paste(curcomp, "/paper", sep = ""))


######################################################
## Results described in Ind Frag over Globalization ##
######################################################

# examples 
usdat[usdat$country == "Korea" & usdat$naicsdes == "Aircraft parts",
  c("numfirmlob","numassoclob","numsupfirm","numoppfirm")]
usdat[usdat$country == "Korea" & usdat$naicsdes == "Hardware",
  c("numsupfirm","numoppfirm","assposadcvd")]

# how many industries have a position taken at some point?
(403 - sum(usdat$numassoc[usdat$country == "Korea"] == 0))/403
unique(usdat$naicsdes[usdat$numassoc == 0])
apos <- c(); alob <- c();
for(i in unique(usdat$naicsdes)){ 
  apos <- c(apos, sum(usdat$assposadcvd[usdat$naicsdes == i] != "No position") > 0)
  alob <- c(alob, sum(usdat$lobassoc[usdat$naicsdes == i]) > 0)
}
summary(apos)
summary(alob)
summary(apos | alob)

mean(usdat$assocbudget[usdat$country == "Korea"] > 100000)
mean(usdat$assocstaff[usdat$country == "Korea"] > 5)

## models on mean and standard deviation of tariffs
summary(usdat$agrtaravg)
quantile(usdat$agrtaravg, seq(0,1,.01))
quantile(sqrt(usdat$agrtarvar), seq(0,1,.01))

# table A1
# quantile(usdat$agrtaravg, seq(0,1,.01))
# quantile(usdat$agrtarvar, seq(0,1,.01))

cur <- usdat
mod1 <- glm(log(agrtaravg+1) ~ log(agrtarnumlines+1)*sector + log(sales0509) + sector + diff, data = cur)
mod2 <- glm(sqrt(usdat$agrtarvar) ~ log(agrtarnumlines+1)*sector + log(sales0509) + sector + diff, data = cur)

mods <- list(summary2(mod1), summary2(mod2)) 
vars <- c(rownames(summary2(mod2))[2:9], "(Intercept)") 
tab <- matrix(data = NA, ncol = length(mods), nrow = length(vars)*2)
rownames(tab) <- c(rep(vars, each = 2))
for(i in 1:length(mods)){mod <- mods[[i]]; tabinmod <- unique(rownames(tab)[rownames(tab) %in% rownames(mod)])
  stars <- rep("", nrow(mod)); stars[mod[tabinmod,4] < .05] <- "^{*}"; stars[mod[tabinmod,4] < .005] <- "^{**}";
  stars[mod[tabinmod,4] < .0005] <- "^{***}";  
  tab[rownames(tab) %in% rownames(mod),i] <- c(rbind(paste(myround(mod[tabinmod,1],3), stars, sep = ""), paste("(", myround(mod[tabinmod,2],3), ")", sep = "")))}
rownames(tab) <- c(rbind(vars, "")); rownames(tab)[c(1,3,5,7,9,11,13,15)] <- c("Num. tariff lines", "Manufacturing", "Mining",
  "Sales", "Mod. differentiated", "Differentiated","Tariff lines * Manu.","Tariff lines * Mining") # , "No. assocs.", "No. firms", "4-firm concentration", "20-firm concentration",
N <- c(sum(summary(mod1)$df[2:3]), sum(summary(mod2)$df[2:3]))
N <- paste("\\multicolumn{1}{c}{", N, "}", sep = "")
R2 <- c(myround(pR2(mod1), 2)[6],myround(pR2(mod2), 2)[6])
R2 <- paste("\\multicolumn{1}{c}{", R2, "}", sep = "")
tab <- rbind(tab, c(R2), c(N)); rownames(tab)[c(nrow(tab)-1,nrow(tab))] <- c("pseudo R$^2$", "N")
addtorow <- list(); addtorow$pos <- list(18); addtorow$command <- c(paste0('\\midrule '))
ptable(cbind(rownames(tab), tab), "tables/tarlines.tex")

# fn 23
cor(cbind(usdat$rpimp0509, as.numeric(usdat$diff)), method = "spearman")
cor(na.omit(cbind(usdat$rpimp0509, usdat$elast)), method = "spearman")
cor(cbind(usdat$totusinp0509, as.numeric(usdat$diff)), method = "spearman")
cor(na.omit(cbind(usdat$totusinp0509, usdat$elast)), method = "spearman")

# fn 24
cor(cbind(log(usdat$rpimp0509+1), usdat$nonrpca0509))
cor(cbind(log(usdat$totusinp0509+1), usdat$nonrpca0509))
cor(cbind(log(usdat$rpimp0509+1), log(usdat$totusinp0509+1)))
cor(residuals(lm(cbind(log(usdat$rpimp0509+1), log(usdat$totusinp0509+1)) ~ usdat$country)))

#############################################
## Descriptive stats for outcome variables ##
#############################################

# Table 1: summary statistics
cord <- c("NAFTA","Jordan","Chile","Singapore","Australia","CAFTA","MEFTA","Peru","Latin America","Korea")
csumm <- data.frame("country" = cord, "positions" = NA,"divided" = NA, "divided alt." = NA, "pos. index" = NA, 
  "lobby" = NA, "lobdivindex" = NA, "lobspnddivindex" = NA)
for(i in cord){
  cur <- usdat[usdat$country == i,]
  poskeepers <- (cur$numsupfirm >= 3 | cur$numoppfirm >= 3 | cur$assposadcvd != "No position")
  lobkeepers <- (cur$numfirmlob >= 3 | cur$numassoclob >= 1)
  lobspndkeepers <- (cur$numfirmlob >= 3 | cur$numassoclob >= 1) & is.na(cur$lobspnddivindex) == FALSE
  summ <- c(mean(poskeepers), mean(cur$divided3), mean(cur$divided3[poskeepers]), mean(cur$posdivindex[poskeepers]), 
    mean(lobkeepers), mean(cur$lobdivindex[lobkeepers]), mean(cur$lobspnddivindex[lobspndkeepers]) )
  summ <- substr(myround(summ, 2), 2, nchar(myround(summ, 2))); summ[summ == "NaN"] <- "-"
  csumm[csumm$country == i,2:8] <- summ
}
csumm <- apply(csumm, 2, as.character)
csumm[,1][csumm[,1] == "MEFTA"] <- "Bah/Mor/Oman"
csumm[,1][csumm[,1] == "Latin America"] <- "Col/Pan"
csumm[csumm[,6] == ".00",c(6,7,8)] <- "\\multicolumn{1}{c}{-}"

poskeepers <- (usdat$numsupfirm >= 3 | usdat$numoppfirm >= 3 | usdat$assposadcvd != "No position")
lobkeepers <- (usdat$numfirmlob >= 3 | usdat$numassoclob >= 1)
lobspndkeepers <- (usdat$numfirmlob >= 3 | usdat$numassoclob >= 1) & is.na(usdat$lobspnddivindex) == FALSE
osumm <- c(mean(poskeepers), mean(usdat$divided3), mean(usdat$divided3[poskeepers]), mean(usdat$posdivindex[poskeepers]), 
    mean(lobkeepers), mean(usdat$lobdivindex[lobkeepers]), mean(usdat$lobspnddivindex[lobspndkeepers]) )
osumm <- substr(myround(osumm, 2), 2, nchar(myround(osumm, 2))); osumm <- c("Overall", osumm)
tab <- rbind(csumm, osumm)
addtorow <- list(); addtorow$pos <- list(10); addtorow$command <- c(paste0('\\midrule '))
tab <- tab[,c(1,3,4,5,7,8)]
ptable(tab, "tables/csumm.tex")

###############
## Divisions ##
###############

# basic models on divisions
mod1a <- glm(divided3 ~ log(rpimp0509+1) + diff + nonrpca0509 + log(sales0509), data = usdat, family = binomial)
mod1b <- glm(divided3 ~ log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509), data = usdat, family = binomial)
mod1c <- glm(divided3 ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = usdat, family = binomial)
mod1d <- glm(divided3 ~ diff + nonrpca0509 + log(sales0509), data = cur, family = binomial)
1 - pchisq(mod1d$deviance - mod1c$deviance, df = 2)

# subset to position-takers only
cur <- usdat[(usdat$numsupfirm >=3 | usdat$numoppfirm >=3 | usdat$assposadcvd %in% c("No position") == FALSE),]; nrow(cur)
mod2a <- glm(divided3 ~ log(rpimp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur, family = binomial)
mod2b <- glm(divided3 ~ log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509), data = cur, family = binomial)
mod2c <- glm(divided3 ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur, family = binomial)
mod2d <- glm(divided3 ~ diff + nonrpca0509 + log(sales0509), data = cur, family = binomial)
1 - pchisq(mod2d$deviance - mod2c$deviance, df = 2)

cor(log(cur$rpimp0509+1), as.numeric(cur$diff))
cor(log(cur$rpimp0509+1), as.numeric(log(cur$elast)), use = "complete.obs")
cor(log(cur$totusinp0509+1), as.numeric(cur$diff))
cor(log(cur$totusinp0509+1), as.numeric(log(cur$elast)), use = "complete.obs")

# Table 2
mods <- list(summary2(mod1a), summary2(mod1b), summary2(mod2a), summary2(mod2b)) 
vars <- c(rownames(summary2(mod1a))[2],rownames(summary2(mod1b))[2:6]) 
tab <- matrix(data = NA, ncol = length(mods), nrow = length(vars)*2)
rownames(tab) <- c(rep(vars, each = 2))
for(i in 1:length(mods)){
  # i <- 2
  mod <- mods[[i]]; tabinmod <- unique(rownames(tab)[rownames(tab) %in% rownames(mod)])
  stars <- rep("", nrow(mod[tabinmod,])); stars[mod[tabinmod,4] < .025] <- "^{*}"; stars[mod[tabinmod,4] < .005] <- "^{**}";
  stars[mod[tabinmod,4] < .0005] <- "^{***}"; 
  tab[rownames(tab) %in% rownames(mod),i] <- c(rbind(paste(myround(mod[tabinmod,1],2), stars, sep = ""), paste("(", myround(mod[tabinmod,2],2), ")", sep = "")))}
rownames(tab) <- c(rbind(vars, "")); rownames(tab)[c(1,3,5,7,9,11)] <- c("Related-party imports", "Imported inputs",
  "Mod. differentiated", "Differentiated", "Comp. Adv.", "Sales") # , "No. assocs.", "No. firms", "4-firm concentration", "20-firm concentration",
N <- c(sum(summary(mod1a)$df[2:3]), sum(summary(mod1b)$df[2:3]),sum(summary(mod2a)$df[2:3]),sum(summary(mod2b)$df[2:3]))
N <- paste("\\multicolumn{1}{c}{", N, "}", sep = "")
R2 <- c(myround(pR2(mod1a), 2)[6],myround(pR2(mod1b), 2)[6],myround(pR2(mod2a), 2)[6],myround(pR2(mod2b), 2)[6])
R2 <- paste("\\multicolumn{1}{c}{", R2, "}", sep = "")
tab <- rbind(tab, c(R2), c(N)); rownames(tab)[c(nrow(tab)-1,nrow(tab))] <- c("pseudo R$^2$", "N")
addtorow <- list(); addtorow$pos <- list(12); addtorow$command <- c(paste0('\\midrule '))
ptable(cbind(rownames(tab), tab), "tables/div.tex")

# simulations
cur <- usdat
model = divided3 ~ log(rpimp0509+1) + diff + nonrpca0509  + log(sales0509) 
csvar = "rpimp0509"; csquant = c(.25, .75); cs1 <- logcs2(model, usdat, csvar, csquant); cs1
csvar = "diff"; csquant = c("Homogeneous", "Mod. differentiated"); cs3a <- logcs2(model, usdat, csvar, csquant); cs3a
csvar = "diff"; csquant = c("Homogeneous", "Differentiated"); cs3b <- logcs2(model, usdat, csvar, csquant); cs3b

model = divided3 ~ log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509) 
csvar = "totusinp0509"; csquant = c(.25, .75); cs2 <- logcs2(model, usdat, csvar, csquant); cs2

model = divided3 ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509) 
mod <- glm(model, data = usdat, family = "binomial"); vcmat <- vcov(mod); 
betas <- mvrnorm(nrow(usdat), mu = coef(mod), Sigma = vcmat); 
mm1 <- apply(model.matrix(mod), 2, median); mm0 <- mm1; 
mm0[c("diffMod. differentiated")] <- 0; mm0[c("diffDifferentiated")] <- 0; mm0[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .1); mm0[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .1);
mm1[c("diffMod. differentiated")] <- 0; mm1[c("diffDifferentiated")] <- 1; mm1[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .0); mm1[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .9);
sup0 <- c(); sup1 <- c()
for(i in 1:nrow(betas)){cf0 <- mm0%*%betas[i,]; sup0[i] <- median(inv.logit(cf0)); cf1 <- mm1%*%betas[i,]; sup1[i] <- median(inv.logit(cf1))}
infALL <- c(myround(median(sup1), 3), myround(median(sup0), 3), myround(quantile((sup1-sup0), .5), 3), 
  paste("[", myround(quantile((sup1-sup0), .025), 3), ", ", myround(quantile((sup1-sup0), .975), 3), "]", sep = ""))

#####################
## Lobbying splits ##
#####################

# positions fragmentation
cur <- usdat[(usdat$numsupfirm >=3 | usdat$numoppfirm >=3 | usdat$assposadcvd %in% c("No position") == FALSE),]; nrow(cur)
mod1a <- glm(I(100*posdivindex) ~ log(rpimp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod1b <- glm(I(100*posdivindex) ~ log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod1c <- glm(I(100*posdivindex) ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod1d <- glm(I(100*posdivindex) ~ diff + nonrpca0509 + log(sales0509), data = cur)
1 - pchisq(mod1d$deviance - mod1c$deviance, df = 2)
R2 <- c(myround(pR2(mod1a), 2)[6], myround(pR2(mod1b), 2)[6])

names(usdat)
summary(lm(I(diff == "Differentiated") ~ log(totusinpn0509+1) + log(totusinpw0509+1), data = usdat))
summary(lm(I(diff == "Homogeneous") ~ log(totusinpn0509+1) + log(totusinpw0509+1), data = usdat))

# lobbying fragmentation
cur <- usdat[usdat$numfirmlob >= 3 | usdat$numassoclob >= 1,]; nrow(cur)
mod2a <- glm(I(100*lobdivindex) ~ log(rpimp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod2b <- glm(I(100*lobdivindex) ~ log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod2c <- glm(I(100*lobdivindex) ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod2d <- glm(I(100*lobdivindex) ~ diff + nonrpca0509 + log(sales0509), data = cur)
1 - pchisq(mod2d$deviance - mod2c$deviance, df = 2)
R2 <- c(R2, myround(pR2(mod2a), 2)[6], myround(pR2(mod2b), 2)[6])

# lobby spending fragmentation 
cur <- usdat[usdat$numfirmlob >= 3 | usdat$numassoclob >= 1,]; nrow(cur)
mod3a <- glm(I(100*lobspnddivindex) ~ log(rpimp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod3b <- glm(I(100*lobspnddivindex) ~ log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod3c <- glm(I(100*lobspnddivindex) ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509), data = cur)
mod3d <- glm(I(100*lobspnddivindex) ~ diff + nonrpca0509 + log(sales0509) + primary, data = cur)
1 - pchisq(mod3d$deviance - mod3c$deviance, df = 2)
R2 <- c(R2, myround(pR2(mod3a), 2)[6], myround(pR2(mod3b), 2)[6])

# Table 3
mods <- list(summary2(mod1a), summary2(mod1b), summary2(mod2a), summary2(mod2b), summary2(mod3a), summary2(mod3b))
vars <- c(rownames(summary2(mod3a))[2],rownames(summary2(mod3b))[2:6]) 
tab <- matrix(data = NA, ncol = length(mods), nrow = length(vars)*2)
rownames(tab) <- c(rep(vars, each = 2))
for(i in 1:length(mods)){mod <- mods[[i]]; tabinmod <- unique(rownames(tab)[rownames(tab) %in% rownames(mod)])
  stars <- rep("", nrow(mod[tabinmod,])); stars[mod[tabinmod,4] < .025] <- "^{*}"; stars[mod[tabinmod,4] < .005] <- "^{**}";
  stars[mod[tabinmod,4] < .0005] <- "^{***}";  
  tab[rownames(tab) %in% rownames(mod),i] <- c(rbind(paste(myround(mod[tabinmod,1],2), stars, sep = ""), paste("(", myround(mod[tabinmod,2],2), ")", sep = "")))}
rownames(tab) <- c(rbind(vars, "")); rownames(tab)[c(1,3,5,7,9,11)] <- c("Related-party imports","Imported inputs",
  "Mod. differentiated", "Differentiated", "Comp. Adv.", "Sales") # , "No. assocs.", "No. firms", "4-firm concentration", "20-firm concentration", 
N <- c(sum(summary(mod1a)$df[2:3]), sum(summary(mod1b)$df[2:3]), sum(summary(mod2a)$df[2:3]), sum(summary(mod2b)$df[2:3]), sum(summary(mod3a)$df[2:3]), sum(summary(mod3b)$df[2:3]))
N <- paste("\\multicolumn{1}{c}{", N, "}", sep = "")
R2 <- paste("\\multicolumn{1}{c}{", R2, "}", sep = "")
tab <- rbind(tab, c(R2), c(N)); rownames(tab)[c(nrow(tab)-1,nrow(tab))] <- c("pseudo R$^2$", "N")
addtorow <- list(); addtorow$pos <- list(12); addtorow$command <- c(paste0('\\midrule '))
ptable(cbind(rownames(tab), tab), "tables/divindices.tex")

# simulations
cur <- usdat[(usdat$numsupfirm >= 3 | usdat$numoppfirm >= 3 | (usdat$assposadcvd %in% c("No position") == FALSE)),]; nrow(cur) # usdat[usdat$indposadcvd != "No position",]; 
model = posdivindex ~ log(rpimp0509+1) + diff + nonrpca0509  + log(sales0509) 
csvar = "rpimp0509"; csquant = c(.25, .75); cs1 <- lmcs2(model, cur, csvar, csquant); cs1 <- cs1[[1]]
csvar = "diff"; csquant = c("Homogeneous", "Mod. differentiated"); cs3a <- lmcs2(model, cur, csvar, csquant); cs3a <- cs3a[[1]]
csvar = "diff"; csquant = c("Homogeneous", "Differentiated"); cs3b <- lmcs2(model, cur, csvar, csquant); cs3b <- cs3b[[1]]

model = posdivindex ~ log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509) 
csvar = "totusinp0509"; csquant = c(.25, .75); cs2 <- lmcs2(model, cur, csvar, csquant); cs2 <- cs2[[1]]
inf1 <- c(cs1, cs2, cs3a, cs3b)

model = posdivindex ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509 + log(sales0509) # + country2
mod <- glm(model, data = cur); vcmat <- vcov(mod); 
betas <- mvrnorm(nrow(cur), mu = coef(mod), Sigma = vcmat); 
mm1 <- apply(model.matrix(mod), 2, median); mm0 <- mm1; 
mm0[c("diffMod. differentiated")] <- 0; mm0[c("diffDifferentiated")] <- 0; mm0[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .1); mm0[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .1);
mm1[c("diffMod. differentiated")] <- 0; mm1[c("diffDifferentiated")] <- 1; mm1[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .9); mm1[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .9);
sup0 <- c(); sup1 <- c()
for(i in 1:nrow(betas)){cf0 <- mm0%*%betas[i,]; sup0[i] <- median(cf0); cf1 <- mm1%*%betas[i,]; sup1[i] <- median(cf1)}
infALL <- c(myround(median(sup1), 3), myround(median(sup0), 3), myround(quantile((sup1-sup0), .5), 3), 
  paste("[", myround(quantile((sup1-sup0), .025), 3), ", ", myround(quantile((sup1-sup0), .975), 3), "]", sep = ""))

cur <- usdat[usdat$numfirmlob >= 3 | usdat$numassoclob >= 1,]; nrow(cur)
model = lobdivindex ~ log(rpimp0509+1) + diff + nonrpca0509  + log(sales0509) # + country2
csvar = "rpimp0509"; csquant = c(.25, .75); cs1 <- lmcs2(model, cur, csvar, csquant); cs1 <- cs1[[1]]
csvar = "diff"; csquant = c("Homogeneous", "Mod. differentiated"); cs3a <- lmcs2(model, cur, csvar, csquant); cs3a <- cs3a[[1]]
csvar = "diff"; csquant = c("Homogeneous", "Differentiated"); cs3b <- lmcs2(model, cur, csvar, csquant); cs3b <- cs3b[[1]]

model = lobdivindex ~ log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509) # + country2
csvar = "totusinp0509"; csquant = c(.25, .75); cs2 <- lmcs2(model, cur, csvar, csquant); cs2 <- cs2[[1]]
inf2 <- c(cs1, cs2, cs3a, cs3b)

model = lobdivindex ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509) # + country2
mod <- glm(model, data = cur); vcmat <- vcov(mod); 
betas <- mvrnorm(nrow(cur), mu = coef(mod), Sigma = vcmat); 
mm1 <- apply(model.matrix(mod), 2, median); mm0 <- mm1; 
mm0[c("diffMod. differentiated")] <- 0; mm0[c("diffDifferentiated")] <- 0; mm0[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .1); mm0[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .1);
mm1[c("diffMod. differentiated")] <- 0; mm1[c("diffDifferentiated")] <- 1; mm1[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .9); mm1[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .9);
sup0 <- c(); sup1 <- c()
for(i in 1:nrow(betas)){cf0 <- mm0%*%betas[i,]; sup0[i] <- median(cf0); cf1 <- mm1%*%betas[i,]; sup1[i] <- median(cf1)}
infALL <- c(myround(median(sup1), 3), myround(median(sup0), 3), myround(quantile((sup1-sup0), .5), 3), 
  paste("[", myround(quantile((sup1-sup0), .025), 3), ", ", myround(quantile((sup1-sup0), .975), 3), "]", sep = ""))

cur <- usdat[usdat$numfirmlob >= 3 | usdat$numassoclob >= 1,]; nrow(cur)
cur <- cur[is.na(cur$lobspnddivindex) == FALSE,]; nrow(cur)
model = lobspnddivindex ~ log(rpimp0509+1) + log(totusinp0509+1) + diff + nonrpca0509  + log(sales0509) # + country2
csvar = "rpimp0509"; csquant = c(.25, .75); cs1 <- lmcs2(model, cur, csvar, csquant); cs1 <- cs1[[1]]
csvar = "totusinp0509"; csquant = c(.25, .75); cs2 <- lmcs2(model, cur, csvar, csquant); cs2 <- cs2[[1]]
csvar = "diff"; csquant = c("Homogeneous", "Mod. differentiated"); cs3a <- lmcs2(model, cur, csvar, csquant); cs3a <- cs3a[[1]]
csvar = "diff"; csquant = c("Homogeneous", "Differentiated"); cs3b <- lmcs2(model, cur, csvar, csquant); cs3b <- cs3b[[1]]
inf3 <- c(cs1, cs2, cs3a, cs3b)

mod <- glm(model, data = cur); vcmat <- vcov(mod); 
betas <- mvrnorm(nrow(cur), mu = coef(mod), Sigma = vcmat); 
mm1 <- apply(model.matrix(mod), 2, median); mm0 <- mm1; 
mm0[c("diffMod. differentiated")] <- 0; mm0[c("diffDifferentiated")] <- 0; mm0[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .25); mm0[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .25);
mm1[c("diffMod. differentiated")] <- 0; mm1[c("diffDifferentiated")] <- 1; mm1[c("log(rpimp0509 + 1)")] <- quantile(log(cur$rpimp0509+1), .75); mm1[c("log(totusinp0509 + 1)")] <- quantile(log(cur$totusinp0509+1), .75);
sup0 <- c(); sup1 <- c()
for(i in 1:nrow(betas)){cf0 <- mm0%*%betas[i,]; sup0[i] <- median(cf0); cf1 <- mm1%*%betas[i,]; sup1[i] <- median(cf1)}
infALL <- c(myround(median(sup1), 3), myround(median(sup0), 3), myround(quantile((sup1-sup0), .5), 3), 
  paste("[", myround(quantile((sup1-sup0), .025), 3), ", ", myround(quantile((sup1-sup0), .975), 3), "]", sep = ""))


#############
## Figures ##
#############

# pdf("figs/posdivfig.pdf", height = 6, width = 4)
tiff("figs/posdivfig.tiff", width = 4, height = 6, units = 'in', res = 600)

# contr <- .4
t.cex <- .9
par(mfrow=c(4,2), mai = c(.12,.025,.12,0), oma = c(.3,.2,0,0)) # bottom, left, top, right
opar <- par(lwd=.6)

cur <- usdat[(usdat$numsupfirm >=3 | usdat$numoppfirm >=3 | usdat$assposadcvd %in% c("No position") == FALSE),]; nrow(cur)
hist(cur$posdivindex[cur$rpimp0509 < median(cur$rpimp0509)], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,4.5), axes = F, main = "")
text("Density", x = -.08, y = 2, srt = 90, cex = t.cex); text("Low related-party imports industries", x = .5, y = 4.05, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)
hist(cur$posdivindex[cur$rpimp0509 >= median(cur$rpimp0509)], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,4.5), axes = F, main = "")
text("High related-party imports industries", x = .5, y = 4.05, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)

hist(cur$posdivindex[cur$totusinp0509 < median(cur$totusinp0509)], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,4.5), axes = F, main = "")
text("Density", x = -.08, y = 2, srt = 90, cex = t.cex); text("Low inputs industries", x = .5, y = 4.05, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)
hist(cur$posdivindex[cur$totusinp0509 >= median(cur$totusinp0509)], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,4.5), axes = F, main = "")
text("High inputs industries", x = .5, y = 4.05, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)

hist(cur$posdivindex[cur$diff == "Homogeneous"], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,4.5), axes = F, main = "", lwd = .5)
text("Density", x = -.08, y = 2, srt = 90, cex = t.cex); text("Homogeneous goods", x = .5, y = 4.05, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)
hist(cur$posdivindex[cur$diff != "Homogeneous"], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,4.5), axes = F, main = "", lwd = .5)
text("Differentiated goods", x = .5, y = 4.05, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)

hist(cur$posdivindex[cur$diff == "Homogeneous" & cur$rpimp0509 < median(cur$rpimp0509) & cur$totusinp0509 < median(cur$totusinp0509)], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,6), axes = F, main = "")
text("Density", x = -.08, y = 2.75, srt = 90, cex = t.cex); text("Homogeneous, low FDI, low inputs", x = .5, y = 6, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)
hist(cur$posdivindex[cur$diff != "Homogeneous" & cur$rpimp0509 >= median(cur$rpimp0509) & cur$totusinp0509 >= median(cur$totusinp0509)], probability = T, xlab = "", xlim = c(-.08,1), ylim = c(0,6), axes = F, main = "")
text("Differentiated, high FDI, high inputs", x = .5, y = 6, cex = t.cex)
axis(1, at = c(0,1), labels = c("0","1"), padj = -2.2, cex.axis = .6, lwd = .5, tck = -.015)
dev.off()

# pdf("figs/lobfig.pdf", height = 2.2, width = 5)
tiff("figs/lobfig.tiff", width = 5, height = 2.2, units = 'in', res = 600)
cur <- usdat[usdat$numfirmlob >= 3 | usdat$numassoclob >= 1,]; nrow(cur)

t.cex <- .45
par(mfrow=c(1,1), mai = c(.05,.025,.12,0)) # bottom, left, top, right
plot(-1,-1, xlim = c(0,1.4), ylim = c(.5,1), axes = FALSE, xlab = "", ylab = "")
a0 <- mean(cur$lobdivindex[cur$totusinp0509 < median(cur$totusinp0509)])
a1 <- mean(cur$lobdivindex[cur$totusinp0509 > median(cur$totusinp0509)], na.rm = TRUE)
segments(.25, .88, a0/3+.25, .88, lwd = .99, lty = "dashed"); segments(.25, .85, a1/3+.25 , .85, lwd = .99)
text("Imported", y=.875, x = -.03, cex = .5, pos = 4); text("  inputs", y=.85, x = -.03, cex = .5, pos = 4); 
text("Low", y=.88, x = .16, cex = t.cex, pos = 4); text("High", y=.85, x = .16, cex = t.cex, pos = 4)
text("Lobbying index", y=.91, x = .21, cex = .6, pos = 4); 

a0 <- mean(cur$lobdivindex[cur$rpimp0509  < median(cur$rpimp0509 )])
a1 <- mean(cur$lobdivindex[cur$rpimp0509  > median(cur$rpimp0509 )], na.rm = TRUE)
segments(.25, .78, a0/3+.25, .78, lwd = .99, lty = "dashed"); segments(.25, .75, a1/3+.25 , .75, lwd = .99)
text("Related-party", y=.775, x = -.03, cex = .5, pos = 4); text("  imports", y=.75, x = -.03, cex = .5, pos = 4); 
text("Low", y=.78, x = .16, cex = t.cex, pos = 4); text("High", y=.75, x = .16, cex = t.cex, pos = 4)

a0 <- mean(cur$lobdivindex[cur$diff == "Homogeneous"])
a1 <- mean(cur$lobdivindex[cur$diff != "Homogeneous"], na.rm = TRUE)
segments(.25, .68, a0/3+.25, .68, lwd = .99, lty = "dashed"); segments(.25, .65, a1/3+.25 , .65, lwd = .99)
text("Product", y=.675,  x = -.03, cex = .5, pos = 4); text("  different.", y=.65,  x = -.03, cex = .5, pos = 4); 
text("Low", y=.68, x = .16, cex = t.cex, pos = 4); text("High", y=.65, x = .16, cex = t.cex, pos = 4)

a0 <- mean(cur$lobdivindex[cur$diff == "Homogeneous" & cur$rpimp0509  < median(cur$rpimp0509) & cur$totusinp0509 < median(cur$totusinp0509)])
a1 <- mean(cur$lobdivindex[cur$diff != "Homogeneous" & cur$rpimp0509  > median(cur$rpimp0509) & cur$totusinp0509 > median(cur$totusinp0509)], na.rm = TRUE)
segments(.25, .58, a0/3+.25, .58, lwd = .99, lty = "dashed"); segments(.25, .55, a1/3+.25 , .55, lwd = .99)
text("Inputs, RPI", y=.575,  x = -.03, cex = .5, pos = 4); text("  and different.", y=.55, x = -.03, cex = .5, pos = 4); 
text("Low", y=.58, x = .16, cex = t.cex, pos = 4); text("High", y=.55, x = .16, cex = t.cex, pos = 4)

segments(.25, .52, (.8)/3 + .25, .52, lwd = .5)
segments(.25 + seq(0,.8, .2)/3, .52, .25 + seq(0,.8, .2)/3, .518, lwd = .5)
text(c(".0",".2",".4",".6",".8"), x = .25 + seq(0,.8, .2)/3, y = .5, cex =.3)

a0 <- mean(cur$lobspnddivindex[cur$rpimp0509 < median(cur$rpimp0509 )])
a1 <- mean(cur$lobspnddivindex[cur$rpimp0509 > median(cur$rpimp0509 )], na.rm = TRUE)
segments(.65, .88, a0/3+.65, .88, lwd = .99, lty = "dashed"); segments(.65, .85, a1/3+.65 , .85, lwd = .99)
text("Low", y=.88, x = .56, cex = t.cex, pos = 4); text("High", y=.85, x = .56, cex = t.cex, pos = 4)
text("Lobby spending index", y=.91, x = .6, cex = .6, pos = 4); 

a0 <- mean(cur$lobspnddivindex[cur$totusinp0509 < median(cur$totusinp0509)])
a1 <- mean(cur$lobspnddivindex[cur$totusinp0509 > median(cur$totusinp0509)], na.rm = TRUE)
text("Low", y=.78, x = .56, cex = t.cex, pos = 4); text("High", y=.75, x = .56, cex = t.cex, pos = 4)
segments(.65, .78, a0/3+.65, .78, lwd = .99, lty = "dashed"); segments(.65, .75, a1/3+.65 , .75, lwd = .99)

a0 <- mean(cur$lobspnddivindex[cur$diff == "Homogeneous"])
a1 <- mean(cur$lobspnddivindex[cur$diff != "Homogeneous"], na.rm = TRUE)
text("Low", y=.68, x = .56, cex = t.cex, pos = 4); text("High", y=.65, x = .56, cex = t.cex, pos = 4)
segments(.65, .68, a0/3+.65, .68, lwd = .99, lty = "dashed"); segments(.65, .65, a1/3+.65, .65, lwd = .99)

a0 <- mean(cur$lobspnddivindex[cur$diff == "Homogeneous" & cur$rpimp0509  < median(cur$rpimp0509) & cur$totusinp0509 < median(cur$totusinp0509)])
a1 <- mean(cur$lobspnddivindex[cur$diff != "Homogeneous" & cur$rpimp0509  > median(cur$rpimp0509) & cur$totusinp0509 > median(cur$totusinp0509)], na.rm = TRUE)
text("Low", y=.58, x = .56, cex = t.cex, pos = 4); text("High", y=.55, x = .56, cex = t.cex, pos = 4)
segments(.65, .58, a0/3+.65, .58, lwd = .99, lty = "dashed"); segments(.65, .55, a1/3+.65, .55, lwd = .99)

segments(.65, .52, (.8)/3 + .65, .52, lwd = .5)
segments(.65 + seq(0,.8, .2)/3, .52, .65 + seq(0,.8, .2)/3, .518, lwd = .5)
text(c(".0",".2",".4",".6",".8"), x = .65 + seq(0,.8, .2)/3, y = .5, cex =.3)

cur <- usdat[usdat$numsupfirm >= 3 | usdat$numoppfirm >= 3 | usdat$assposadcvd %in% c("Favor","Oppose","Divided"),]; nrow(cur)

a0 <- mean(cur$divided3[cur$rpimp0509 < median(cur$rpimp0509 )])
a1 <- mean(cur$divided3[cur$rpimp0509 > median(cur$rpimp0509 )], na.rm = TRUE)
segments(1.05, .88, 3*a0+1.05, .88, lwd = .99, lty = "dashed"); segments(1.05, .85, 3*a1+1.05 , .85, lwd = .99)
text("Low", y=.88, x = .96, cex = t.cex, pos = 4); text("High", y=.85, x = .96, cex = t.cex, pos = 4)
text("Industry divisions", y=.91, x = 1, cex = .6, pos = 4); 

a0 <- mean(cur$divided3[cur$totusinp0509 < median(cur$totusinp0509)])
a1 <- mean(cur$divided3[cur$totusinp0509 > median(cur$totusinp0509)], na.rm = TRUE)
text("Low", y=.78, x = .96, cex = t.cex, pos = 4); text("High", y=.75, x = .96, cex = t.cex, pos = 4)
segments(1.05, .78, 3*a0+1.05, .78, lwd = .99, lty = "dashed"); segments(1.05, .75, 3*a1+1.05 , .75, lwd = .99)

a0 <- mean(cur$divided3[cur$diff == "Homogeneous"])
a1 <- mean(cur$divided3[cur$diff != "Homogeneous"], na.rm = TRUE)
text("Low", y=.68, x = .96, cex = t.cex, pos = 4); text("High", y=.65, x = .96, cex = t.cex, pos = 4)
segments(1.05, .68, 3*a0+1.05, .68, lwd = .99, lty = "dashed"); segments(1.05, .65, 3*a1+1.05, .65, lwd = .99)

a0 <- mean(cur$divided3[cur$diff == "Homogeneous" & cur$rpimp0509  < median(cur$rpimp0509) & cur$totusinp0509 < median(cur$totusinp0509)])
a1 <- mean(cur$divided3[cur$diff != "Homogeneous" & cur$rpimp0509  > median(cur$rpimp0509) & cur$totusinp0509 > median(cur$totusinp0509)], na.rm = TRUE)
text("Low", y=.58, x = .96, cex = t.cex, pos = 4); text("High", y=.55, x = .96, cex = t.cex, pos = 4)
segments(1.05, .58, 3*a0+1.05, .58, lwd = .99, lty = "dashed"); segments(1.05, .55, 3*a1+1.05, .55, lwd = .99)

segments(1.05, .52, 3*(.10) + 1.05, .52, lwd = .5)
segments(1.05 + seq(0,.10, .02)*3, .52, 1.05 + seq(0,.10, .02)*3, .518, lwd = .5)
text(c(".00",".02",".04",".06",".08",".10"), x = 1.05 + seq(0,.10, .02)*3, y = .5, cex =.3)
dev.off()

